library(timetk)
library(inspectdf)
library(janitor)
library(readr)
library(dplyr)
library(readr)
library(ggplot2)
library(naniar)
library(packcircles)
library(ggridges)
library(ggbeeswarm)
library(patchwork)
library(dplyr)
library(tidyr)
library(readr)
library(skimr)
library(purrr)
library(stringr)
library(urltools)
library(magrittr)
library(lubridate)
library(janitor)
library(tseries)
Az adatsor Tetouan City 2017-es áramfogyasztásának alakulását írja le 10 percenkénti bontásban 3 zónára.
Salam, A., & El Hibaoui, A. (2018, December). Comparison of Machine Learning Algorithms for the Power Consumption Prediction:-Case Study of Tetouan city–. In 2018 6th International Renewable and Sustainable Energy Conference (IRSEC) (pp. 1-5). IEEE
az áramfogyasztás előrejelzése idősor elemzéssel
url <-
"https://raw.githubusercontent.com/fzksmrk/adatbanyaszati-beadando-2/main/data.csv"
data <- readr::read_csv(
url,
col_types = cols(
DateTime = col_datetime(format = "%m/%d/%Y %H:%M"),
Temperature = col_double(),
Humidity = col_double(),
`Wind Speed` = col_double(),
`general diffuse flows` = col_double(),
`diffuse flows` = col_double(),
`Zone 1 Power Consumption` = col_double(),
`Zone 2 Power Consumption` = col_double(),
`Zone 3 Power Consumption` = col_double()
)
)
data
data <- janitor::clean_names(data)
data <- dplyr::rename(data,"zone_1" = "zone_1_power_consumption","zone_2" = "zone_2_power_consumption","zone_3" = "zone_3_power_consumption")
data
data <- data %>%
mutate(data, zone_total = zone_1 + zone_2 + zone_3)
data <- data %>%
timetk::tk_augment_timeseries_signature(date_time) %>%
select(
-matches(
"(half)|(mday)|(qday)|(mday)|(yday)|(mweek)|(xts)|(second)|(minute)|(iso)|(num)|(hour12)|(am.pm)|(week\\d)|(mday7)"
)
) %>%
select(-diff,-wday) %>%
mutate(hour = factor(hour, ordered = TRUE))
inspectdf::inspect_num(data)
data %>%
ggplot(aes(date_time, temperature)) +
geom_line() +
labs(
x = NULL,
y = "Temperature"
) -> p1
data %>%
ggplot(aes(date_time, humidity)) +
geom_line() +
labs(
x = NULL,
y = "Humidity"
) -> p2
data %>%
ggplot(aes(date_time, wind_speed)) +
geom_line() +
labs(
x = NULL,
y = "Wind Speed"
) -> p3
data %>%
select(date_time, contains("flows")) %>%
pivot_longer(-date_time) %>%
ggplot(aes(date_time, value)) +
geom_line(aes(color = name)) +
labs(
x = NULL,
y = "Flows",
color = "Flows"
) -> p4
p1 / p2 / p3 / p4
data %>%
select(date_time, contains("zone")) %>%
pivot_longer(-date_time) %>%
ggplot(aes(date_time, value)) +
geom_line(aes(color = name)) +
scale_y_continuous(labels = scales::label_number_si()) +
labs(
x = NULL,
y = "Power",
color = "Zone"
)
boxplot(data$zone_1, main = "Zone 1", ylab = "Value")
boxplot(data$zone_2, main = "Zone 2", ylab = "Value")
boxplot(data$zone_3, main = "Zone 3", ylab = "Value")
boxplot(data$zone_total, main = "Zone Total", ylab = "Value")
data %>%
select(hour, zone_1,zone_2,zone_3) %>%
pivot_longer(-hour) %>%
ggplot(aes(hour, value)) +
geom_boxplot(aes(color = name),
outlier.alpha = 0.1) -> p1
data %>%
select(wday.lbl, zone_1,zone_2,zone_3) %>%
pivot_longer(-wday.lbl) %>%
ggplot(aes(wday.lbl, value)) +
geom_boxplot(aes(color = name),
outlier.alpha = 0.1) -> p2
data %>%
select(month.lbl, zone_1,zone_2,zone_3) %>%
pivot_longer(-month.lbl) %>%
ggplot(aes(month.lbl, value)) +
geom_boxplot(aes(color = name),
outlier.alpha = 0.1) -> p3
p1/ p2 / p3
decompose_ts <- function(series, freq = 144) {
ts_obj <- ts(series, frequency = freq)
decompose(ts_obj)
}
plot(decompose_ts(data$zone_1))
plot(decompose_ts(data$zone_2))
plot(decompose_ts(data$zone_3))
plot(decompose_ts(data$temperature))
plot(decompose_ts(data$wind_speed))
plot(decompose_ts(data$humidity))
plot(decompose_ts(data$general_diffuse_flows))
plot(decompose_ts(data$diffuse_flows))
az áramfogyasztás előrejelzése idősor elemzéssel
Hozzuk létre az idősorunkat
zone_1_ts <- ts(data$zone_1, frequency = 144)
Elemezzük, hogy az adatunk stacionárius-e
adf_test <- adf.test(zone_1_ts)
## Warning in adf.test(zone_1_ts): p-value smaller than printed p-value
print(adf_test$p.value)
## [1] 0.01
Mivel a p érték kevesebb, mint 0.05, ezért a nullhipotézist elvethetjük és elfogadhatjuk, hogy az adatunk stacionárius
acf(zone_1_ts)
pacf(zone_1_ts)
library(forecast)
fit <- auto.arima(zone_1_ts, seasonal = TRUE)
summary(fit)
## Series: zone_1_ts
## ARIMA(3,0,0)(0,1,0)[144]
##
## Coefficients:
## ar1 ar2 ar3
## 1.0507 -0.1566 0.0816
## s.e. 0.0044 0.0063 0.0044
##
## sigma^2 = 187691: log likelihood = -391528.2
## AIC=783064.5 AICc=783064.5 BIC=783100
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.162183 432.6257 264.0267 -0.01083862 0.8469455 0.2029934
## ACF1
## Training set -0.0007429365
forecast_values <- forecast(fit, h = 77*2*5)
lasts <- tail(zone_1_ts, 77*2*10)
# Create a data frame for the historical data
history_df <- data.frame(Time = time(lasts), Value = as.numeric(lasts), Lo95 = NA, Hi95 = NA, Type = "Historical")
# Create a data frame for the forecasted data
forecast_df <- data.frame(Time = time(forecast_values$mean),
Value = as.numeric(forecast_values$mean),
Lo95 = as.numeric(forecast_values$lower[,2]),
Hi95 = as.numeric(forecast_values$upper[,2]),
Type = "Forecast")
# Ensure that 'Time' columns are of the same class
history_df$Time <- as.numeric(history_df$Time)
forecast_df$Time <- as.numeric(forecast_df$Time)
# Combine the historical and forecasted data into a single data frame
combined_df <- rbind(history_df, forecast_df)
# Create a ggplot of the historical and forecasted values
ggplot() +
geom_line(data = combined_df, aes(x = Time, y = Value, color = Type)) +
geom_line(data = forecast_df, aes(x = Time, y = Lo95), linetype = "dotted", color = "blue") +
geom_line(data = forecast_df, aes(x = Time, y = Hi95), linetype = "dotted", color = "blue") +
scale_color_manual(values = c("Historical" = "black", "Forecast" = "red")) +
labs(title = "ARIMA Forecast", x = "Time", y = "Value") +
theme_minimal()
zone_ts <- ts(data$zone_total, frequency = 144)
fit_zone <- auto.arima(zone_ts, seasonal = TRUE)
summary(fit_zone)
## Series: zone_ts
## ARIMA(4,0,0)(0,1,0)[144]
##
## Coefficients:
## ar1 ar2 ar3 ar4
## 1.0733 -0.1280 0.0500 -0.0108
## s.e. 0.0044 0.0064 0.0064 0.0044
##
## sigma^2 = 349552: log likelihood = -407780.2
## AIC=815570.4 AICc=815570.4 BIC=815614.7
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.2655549 590.3941 406.7471 -0.004305024 0.5934085 0.1754785
## ACF1
## Training set 4.183627e-05
forecast_values <- forecast(fit_zone, h = 77*2*5)
lasts <- tail(zone_ts, 77*2*10)
# Create a data frame for the historical data
history_df <- data.frame(Time = time(lasts), Value = as.numeric(lasts), Lo95 = NA, Hi95 = NA, Type = "Historical")
# Create a data frame for the forecasted data
forecast_df <- data.frame(Time = time(forecast_values$mean),
Value = as.numeric(forecast_values$mean),
Lo95 = as.numeric(forecast_values$lower[,2]),
Hi95 = as.numeric(forecast_values$upper[,2]),
Type = "Forecast")
# Ensure that 'Time' columns are of the same class
history_df$Time <- as.numeric(history_df$Time)
forecast_df$Time <- as.numeric(forecast_df$Time)
# Combine the historical and forecasted data into a single data frame
combined_df <- rbind(history_df, forecast_df)
# Create a ggplot of the historical and forecasted values
ggplot() +
geom_line(data = combined_df, aes(x = Time, y = Value, color = Type)) +
geom_line(data = forecast_df, aes(x = Time, y = Lo95), linetype = "dotted", color = "blue") +
geom_line(data = forecast_df, aes(x = Time, y = Hi95), linetype = "dotted", color = "blue") +
scale_color_manual(values = c("Historical" = "black", "Forecast" = "red")) +
labs(title = "ARIMA Forecast", x = "Time", y = "Value") +
theme_minimal()